/******************************************************************************
This file cleans NYC middle school applications data and merges it with
demographic and test score data.
--------------------------------------------------------------------------------
Input:			Raw NYC MS application data
					> "${cleandata}`year'_MS_Admissions_for_MIT.dta"
				Cleaned demographic data
					>  ${cleandata}/demo_all_years_ms.dta
				Cleaned enrollment dataset
					>  "${cleandata}enroll_data_MS.dta"
				Cleaned test data
					>	"${cleandata}test-Biog_appended.dta"
--------------------------------------------------------------------------------
Output:			"${cleandata}application_demo_test.dta"
--------------------------------------------------------------------------------*/
clear all
set more off

local application_data 	1 //Clean application data and merge it with enrollment dataset
local demo_test 		1 // Merge cleaned application data and merge it with demographic and test score data
local fy = 2016
local ly = 2018

set obs 1
gen year = .
tempfile append
save `append'

if `application_data' ==1{

forvalues year = `fy'/`ly'{

	use "${cleandata}match/MiddleSchoolAdmissions/`year'_MS_Admissions_scrambled.dta", clear

	*change the talent variable for 2019
	if `year'== 2019{

		gen talent1=""
		gen talent1score=.
		gen talent2=""
		gen talent2score=.

		forvalues i =1/2{
			cap replace talent`i' = "Art" if talent_art !=. & talent`i'==""
			replace talent`i'score = talent_art if talent`i' == "Art"
			 replace talent_art =. if talent`i' == "Art"

			cap replace talent`i' = "Athletics" if talent_athletics !=. & talent`i'==""
			replace talent`i'score = talent_athletics if talent`i' == "Athletics"
			 replace talent_athletics =. if talent`i' == "Athletics"

			cap replace talent`i' = "Computer/Math" if talent_computer_math !=. & talent`i'==""
			replace talent`i'score = talent_computer_math if talent`i' == "Computer/Math"
			 replace talent_computer_math =. if talent`i' == "Computer/Math"

			cap replace talent`i' = "Dance" if talent_dance !=. & talent`i'==""
			replace talent`i'score = talent_dance if talent`i' == "Dance"
			 replace talent_dance =. if talent`i' == "Dance"

			cap replace talent`i' = "Drama" if talent_drama !=. & talent`i'==""
			replace talent`i'score = talent_drama if talent`i' == "Drama"
			 replace talent_drama =. if talent`i' == "Drama"

			cap replace talent`i' = "CW/Journalism" if talent_cw_journalism !=. & talent`i'==""
			replace talent`i'score = talent_cw_journalism if talent`i' == "CW/Journalism"
			 replace talent_cw_journalism =. if talent`i' == "CW/Journalism"

			cap replace talent`i' = "Media" if talent_media !=. & talent`i'==""
			replace talent`i'score = talent_media if talent`i' == "Media"
			 replace talent_media =. if talent`i' == "Media"

			cap replace talent`i' = "Science" if talent_science !=. & talent`i'==""
			replace talent`i'score = talent_science if talent`i' == "Science"
			 replace talent_science =. if talent`i' == "Science"

			cap replace talent`i' = "Strings" if talent_strings !=. & talent`i'==""
			replace talent`i'score = talent_strings if talent`i' == "Strings"
			 replace talent_strings =. if talent`i' == "Strings"

			cap replace talent`i' = "Winds" if talent_winds !=. & talent`i'==""
			replace talent`i'score = talent_winds if talent`i' == "Winds"
			 replace talent_winds =. if talent`i' == "Winds"

			cap replace talent`i' = "Vocal" if talent_vocal !=. & talent`i'==""
			replace talent`i'score = talent_vocal if talent`i' == "Vocal"
			 replace talent_vocal =. if talent`i' == "Vocal"
		}
		drop talent_art talent_athletics talent_cw_journalism talent_computer_math talent_dance talent_drama talent_media talent_science talent_strings talent_vocal talent_winds
	}

	* Rename/ change type of variables
	cap rename student_id_scram stu
	cap replace roundmatched = "" if roundmatched =="." | roundmatched == "X391Z"
	cap destring roundmatched, replace
	forvalue i=1/70{
		cap replace matchprioritygroup`i' ="" if matchprioritygroup`i'=="-"
		cap destring  matchprioritygroup`i' , replace
	}
	cap tostring nsmp , replace
	cap tostring nsmpdbn, replace

	*keep 5th grade applicants
	keep if currentgrade==5
	cap drop grade

	cap drop year
	gen year =`year'
	append using `append'
	save `append', replace

}

	*order vars
	order year stu	finaloffer	finalofferdbn	mrmatch	mrmatchdbn	mrmp	mrmpdbn	nsmatch	nsmatchdbn	nsmp	nsmpdbn	nsofferresponse	appealmatch	appealmatchdbn	appealmp	appealmpdbn	eszoneddbn	zonedschooldbn	futureeszone	futuremszone	futuremszonedistrict	resdistrict	currentgrade	sped	currentschooldbn	elaalttestname	elaalttestreadscore	elaalttestwritingscore	elaproficiencyrating	mathalttestname	mathalttestscore	mathproficiencyrating	nyseslattestdate	accessiblesiterequired	readingcategory	dayspresent	daysabsent	dayslate	absent	late	math	ela	ss	sc	timemanagementindependence	organization	perseverance	asksforhelp	respectsschoolrulescollaboration	talent1	talent1score	talent2	talent2score	iep	manuallyplacedapplicationchoice	finalprogramcode	finalprogramname	finalschooldbn	finalschoolname	roundmatched	tiebreaker	programname1	programcode1	programtype1	schooldbn1	choice1	rank1	compositescore1	matchprioritygroup1	eligible1	matcheligibleyn1	programname2	programcode2	programtype2	schooldbn2	choice2	rank2	compositescore2	matchprioritygroup2	eligible2	matcheligibleyn2	programname3	programcode3	programtype3	schooldbn3	choice3	rank3	compositescore3	matchprioritygroup3	eligible3	matcheligibleyn3	programname4	programcode4	programtype4	schooldbn4	choice4	rank4	compositescore4	matchprioritygroup4	eligible4	matcheligibleyn4	programname5	programcode5	programtype5	schooldbn5	choice5	rank5	compositescore5	matchprioritygroup5	eligible5	matcheligibleyn5	programname6	programcode6	programtype6	schooldbn6	choice6	rank6	compositescore6	matchprioritygroup6	eligible6	matcheligibleyn6	programname7	programcode7	programtype7	schooldbn7	choice7	rank7	compositescore7	matchprioritygroup7	eligible7	matcheligibleyn7	programname8	programcode8	programtype8	schooldbn8	choice8	rank8	compositescore8	matchprioritygroup8	eligible8	matcheligibleyn8	programname9	programcode9	programtype9	schooldbn9	choice9	rank9	compositescore9	matchprioritygroup9	eligible9	matcheligibleyn9	programname10	programcode10	programtype10	schooldbn10	choice10	rank10	compositescore10	matchprioritygroup10	eligible10	matcheligibleyn10	programname11	programcode11	programtype11	schooldbn11	choice11	rank11	compositescore11	matchprioritygroup11	eligible11	matcheligibleyn11	programname12	programcode12	programtype12	schooldbn12	choice12	rank12	compositescore12	matchprioritygroup12	eligible12	matcheligibleyn12	nstiebreaker	nsprogrampriority1	nsprogramcode1	nsprogramname1	nsschooldbn1	nsprogramtype1	nspclrank1	nsprogrampriority2	nsprogramcode2	nsprogramname2	nsschooldbn2	nsprogramtype2	nspclrank2	nsprogrampriority3	nsprogramcode3	nsprogramname3	nsschooldbn3	nsprogramtype3	nspclrank3	nsprogrampriority4	nsprogramcode4	nsprogramname4	nsschooldbn4	nsprogramtype4	nspclrank4 appealtiebreaker	appealprogrampriority1	appealprogramcode1	appealprogramname1	appealschooldbn1	appealprogramtype1	appealpclrank1	appealcompositescore1	appealprogrampriority2	appealprogramcode2	appealprogramname2	appealschooldbn2	appealprogramtype2	appealpclrank2	appealcompositescore2	appealprogrampriority3	appealprogramcode3	appealprogramname3	appealschooldbn3	appealprogramtype3	appealpclrank3	appealcompositescore3	appealprogrampriority4	appealprogramcode4	appealprogramname4	appealschooldbn4	appealprogramtype4	appealpclrank4	appealcompositescore4	appealprogrampriority5	appealprogramcode5	appealprogramname5	appealschooldbn5	appealprogramtype5	appealpclrank5	appealcompositescore5	appealprogrampriority6	appealprogramcode6	appealprogramname6	appealschooldbn6	appealprogramtype6	appealprogrampriority7	appealprogramcode7	appealprogramname7	appealschooldbn7	appealprogramtype7	appealcompositescore7	appealprogrampriority8	appealprogramcode8	appealprogramname8	appealschooldbn8	appealprogramtype8	appealpclrank8	appealprogrampriority9	appealprogramcode9	appealprogramname9	appealschooldbn9	appealprogramtype9	appealprogrampriority10	appealprogramcode10	appealprogramname10	appealschooldbn10	appealprogramtype10	appealpclrank10	appealprogrampriority11	appealprogramcode11	appealprogramname11	appealschooldbn11	appealprogramtype11	appealprogrampriority12	appealprogramcode12	appealprogramname12	appealschooldbn12	appealprogramtype12	optout

	*get school's district
	gen current_district =  substr(currentschooldbn,1,2)
	gen district_offered =  substr(finalofferdbn,1,2)

	* drop peopel with no final offer and no tiebreaker
	drop if (finalofferdbn =="-" | finalofferdbn=="") & tiebreaker =="" & year <2019
	drop if (finalofferdbn =="-" | finalofferdbn=="")  & year <2019
	drop if  (finalofferdbn =="-" | finalofferdbn=="") & appealprogramcode1 ==""

	*correct the final offers for 2019 for people that have appealed and not succeeded
	replace finalofferdbn = mrmatchdbn if (finalofferdbn =="-" | finalofferdbn=="")  & mrmatchdbn !="-" & year ==2019
	replace finalofferdbn = mrmpdbn if (finalofferdbn =="-" | finalofferdbn=="")  & mrmpdbn !="-" & year ==2019
	replace finaloffer = mrmatch if  ( finaloffer=="-" | finaloffer =="") & mrmatch !="" & year ==2019
	replace finaloffer = mrmp if  ( finaloffer=="-" | finaloffer =="")& mrmp !="" & year ==2019

	* recode 2019 final offer and mr match when mossing
	replace finalofferdbn ="" if finalofferdbn=="-"
	replace mrmatchdbn ="" if mrmatchdbn=="-"
	replace finaloffer ="" if finaloffer=="-"
	replace mrmatch ="" if mrmatch=="-"

	save "${cleandata}application_all.dta", replace
}

* merge with demographics

if `demo_test' == 1{
	clear all
	set obs 1
	gen year = .
	tempfile append
	save `append'

	forvalues year = `fy'/`ly'{

	use "${cleandata}application_all.dta", clear
	keep if year ==`year'
	rename year year_match

	tostring stu, replace

	merge 1:m stu using "${cleandata}/demo_all_years_ms.dta", keepusing(sex ethnicity birth_mm_yyyy ell swd poverty home_lang pob_code female asian black hispanic nat_american white other fr_lunch disability year)
	drop if _merge==2

	*keep closest year availble for each stu (with preference for year before)
	gen aux = abs(year_match-year)
	replace aux = aux+0.5 if year_match-year<0
	bys stu: egen aux1 =	min(aux)
	keep if aux1==aux

	drop aux*
	drop  _merge
	drop year

	duplicates drop
	rename year_match year
	append using `append'
	save `append', replace

	}
	sort year

	*save the dataset
	save "${cleandata}application_all_bio.dta",replace
}
